knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(devtools)Loading required package: usethis
library(ExPanDaR)Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'data.table':
method from
print.data.table
library(knitr)
library(tidyverse)Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages --------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.0 v purrr 0.3.4
v tibble 3.0.1 v dplyr 1.0.2
v tidyr 1.0.3 v stringr 1.4.0
v readr 1.3.1 v forcats 0.5.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(modelr)
library(broom)
Attaching package: 㤼㸱broom㤼㸲
The following object is masked from 㤼㸱package:modelr㤼㸲:
bootstrap
library(data.table)data.table 1.12.8 using 2 threads (see ?getDTthreads). Latest news: r-datatable.com
Attaching package: 㤼㸱data.table㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
between, first, last
The following object is masked from 㤼㸱package:purrr㤼㸲:
transpose
library(readxl)
#Install and load older versions of the following packages
#remove.packages("REAT")
#install_version("REAT", version = "2.1.1", repos = "http://cran.us.r-project.org")
library(REAT)
Attaching package: 㤼㸱REAT㤼㸲
The following object is masked from 㤼㸱package:data.table㤼㸲:
shift
library(forecast)Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
# Change the presentation of decimal numbers to 4 and avoid scientific notation
options(prompt="R> ", digits=8, scipen=999)Via the tidyverse package
library(readr)
long_master_nona <- read_csv("01-raw-data/noNA_long_all_data_master_health_violence_services_education.csv", locale = locale(encoding = "ISO-8859-1"), col_types = cols(NDVr = col_double(),
eb_ndvr = col_double(), eb_npir = col_double(),
nimr = col_double(), Litr= col_double(),
nmrpr = col_double(), nmpr = col_double() ))
wide_master_nona <- read_csv("01-raw-data/noNA_wide_data_master_health_violence_services_education.csv", locale = locale(encoding = "ISO-8859-1"))
cv<- read_csv( "03-data/code04_timeseries_cv.csv")
cv<-cv[,-1]
cvy<- read_csv( "03-data/code04_timeseries_cv_yearly.csv")
cvy<-cvy[,-1]Selecting variables needed
wide_master_nonais the speed of convergence changing over time?
nmr<- wide_master_nona %>%
select(2:5,starts_with("eb_nmr"))
nmralpha=c()
lambda=c()
beta_coefficient=c()
half_life=c()
p_value=c()
years=c()
list_beta=list()
year_final=c()
for (i in 1:12) {
tab<-nmr%>%
select(5:20)
tab<- tab %>%
select(i,i+4)
name1<- as.character(colnames(tab)[1])
name2<- as.character(colnames(tab)[2])
colnames(tab)[1]<-"FY_yo"
colnames(tab)[2]<-"FY_yt"
tab
beta <- betaconv.ols (tab$FY_yo, i, tab$FY_yt, i+4,
beta.plot = FALSE,
beta.plotLine = TRUE,
beta.plotX = paste("Ln (non murder rate in", name1),
beta.plotY = paste("Annual Growth Rate",name1,name2),
beta.plotTitle = paste(name1,name2),
beta.plotLineCol = "purple",output.results = FALSE)
alpha[i]<-as.data.frame(beta[["abeta"]])[1,1]
beta_coefficient[i]<-as.data.frame(beta[["abeta"]])[2,1]
lambda[i]<-as.data.frame(beta[["abeta"]])[3,1]
half_life[i]<-as.data.frame(beta[["abeta"]])[4,1]
p_value[i]<-as.data.frame(beta[["abeta"]])[2,4]
years[i]<- paste(substr(name1,nchar(name1)-3,nchar(name1)),substr(name2,nchar(name2)-3,nchar(name2)),sep="-")
year_final[i]<- as.integer(substr(name2,nchar(name2)-3,nchar(name2)),sep="-")
list_beta[[i]]<- beta
}
nmr_beta<- data.frame(years,year_final, alpha,beta_coefficient,lambda, half_life,p_value)nmr_betanmr_betanmr_beta %>%
#ggplot(aes(x=year_final, y=beta_coefficient))+
ggplot(aes(x=year_final, y=alpha)) +
geom_point()+
geom_smooth(method="lm")nmr_beta %>%
#ggplot(aes(x=year_final, y=beta_coefficient))+
ggplot(aes(x=year_final, y=lambda)) +
geom_point()+
geom_smooth(method="lm")
nmr_beta %>%
ggplot(aes(x=year_final, y=beta_coefficient))+
#ggplot(aes(x=year_final, y=lambda)) +
geom_point()+
geom_smooth(method="lm")
lm_nmr_beta <- lm(beta_coefficient~year_final, nmr_beta)
lm_nmr_beta %>% tidy()newd <- data.frame(year_final=2022)
beta_2022<- predict(lm_nmr_beta,newd)
lm_nmr_lambda <- lm(lambda~year_final, nmr_beta)
lm_nmr_lambda %>% tidy()newd <- data.frame(year_final=2022)
lambda_2022<- predict(lm_nmr_lambda,newd)
lm_nmr_alpha <- lm(alpha~year_final, nmr_beta)
lm_nmr_alpha %>% tidy()newd <- data.frame(year_final=2022)
alpha_2022<- predict(lm_nmr_alpha,newd)
alpha_2022 1
1.0397204
beta_2022 1
-0.11288337
lambda_2022 1
-0.030322844
final_coeff<- nmr_beta %>% select(-1)
final_coeffwrite.csv(final_coeff, "03-data/code04_table_pred.csv")#long_master_nona
long_master_nona %>%
select(1:6,eb_nmr) %>%
filter(year==2018) %>%
summarise(mean(eb_nmr))
#target 23,23 per 100.000 people
target<- 10000-(23.23/10)
target[1] 9997.677
nmr_2018 <- long_master_nona %>%
select(code,year,eb_nmr) %>%
filter(year==2018) %>%
filter(eb_nmr < target)
nmr_2018
y2022=c()
y2022 <- exp(alpha_2022+((1+beta_2022)*log(nmr_2018$eb_nmr)))
#y2022
nmr_2018$NMr2022 <- y2022
nmr_2018 %>%
filter(NMr2022<target)We would like to forecast the value of NMR for the year 2022 but having data up to 2018, that is h=4. we could try to use as a training set data from 2003 to 2014 in order to predidct the value of 2018 and compare it with the real value
nmr_beta_14<- nmr_beta %>%
filter(year_final<=2014)
nmr_beta_14nmr_beta_14 %>%
#ggplot(aes(x=year_final, y=beta_coefficient))+
ggplot(aes(x=year_final, y=alpha)) +
geom_point()+
geom_smooth(method="lm")nmr_beta_14 %>%
#ggplot(aes(x=year_final, y=beta_coefficient))+
ggplot(aes(x=year_final, y=lambda)) +
geom_point()+
geom_smooth(method="lm")
nmr_beta_14 %>%
ggplot(aes(x=year_final, y=beta_coefficient))+
#ggplot(aes(x=year_final, y=lambda)) +
geom_point()+
geom_smooth(method="lm")
lm_nmr_beta_14 <- lm(beta_coefficient~year_final, nmr_beta_14)
lm_nmr_beta_14 %>% tidy()newd <- data.frame(year_final=2018)
beta_2018<- predict(lm_nmr_beta_14,newd)
lm_nmr_lambda <- lm(lambda~year_final, nmr_beta_14)
lm_nmr_lambda %>% tidy()newd <- data.frame(year_final=2018)
lambda_2018<- predict(lm_nmr_lambda,newd)
lm_nmr_alpha <- lm(alpha~year_final, nmr_beta_14)
lm_nmr_alpha %>% tidy()newd <- data.frame(year_final=2018)
alpha_2018<- predict(lm_nmr_alpha,newd)
alpha_2018 1
1.3045419
beta_2018 1
-0.14163641
lambda_2018 1
-0.070536311
We can now compute the forecast for 2018 and compare it to the actual value
#long_master_nona
nmr_2018 <- wide_master_nona %>%
select(code,eb_nmr_2014, eb_nmr_2018)
nmr_2018
y2018=c()
y2018 <- exp(alpha_2018+((1+beta_2018)*log(nmr_2018$eb_nmr_2014)))
#y2022
nmr_2018$fc2018 <- y2018
nmr_2018 <- nmr_2018 %>%
mutate(forecast_error=eb_nmr_2018-fc2018)
nmr_2018 %>% arrange(forecast_error)NAmean((nmr_2018$forecast_error)^2)[1] 7.8302761
nmr_betaalphax=c()
betax=c()
lambdax=c()
nmr_betafor (x in seq_along(2011:2018)) {
ye=2007+(x-1)
nmr_betax<- nmr_beta %>%
filter(year_final<=ye)
lm_nmr_betax <- lm(beta_coefficient~year_final, nmr_betax)
lm_nmr_betax %>% tidy()
newd <- data.frame(year_final=ye+4)
beta_x4<- predict(lm_nmr_betax,newd)
lm_nmr_lambda <- lm(lambda~year_final, nmr_betax)
lm_nmr_lambda %>% tidy()
newd <- data.frame(year_final=ye+4)
lambda_x4<- predict(lm_nmr_lambda,newd)
lm_nmr_alpha <- lm(alpha~year_final, nmr_betax)
lm_nmr_alpha %>% tidy()
newd <- data.frame(year_final=ye+4)
alpha_x4<- predict(lm_nmr_alpha,newd)
alphax[x]= alpha_x4
betax[x]=beta_x4
lambda[x]=lambda_x4
}alphax[1] 7.80337836 1.54458092 1.28866776 1.90273287
[5] 2.70741957 1.49217308 0.57446011 1.30454185
nmr_xx <- wide_master_nona %>%
select(code,starts_with("eb_nmr"))
nmr_xx
fc_nmr=c()
for (jj in 1:8) {
fc_nmr <- exp(alphax[jj]+((1+betax[[jj]])*log(nmr_xx[[jj+5]])))
nmr_xx<- cbind(nmr_xx, fc_nmr)
colnames(nmr_xx)[ncol(nmr_xx)]<-paste("fc",colnames(nmr_xx)[jj+9],sep="")
}
nmr_xxnmr_xxx<- nmr_xx %>%
select(10:25)
year=c(2011:2018)
forecast_error=c()
nmr_xxxfor (i in seq_along(year)) {
forecast_error= (nmr_xxx[[i]]-nmr_xxx[[i+8]])^2
nmr_xxx<- cbind(nmr_xxx, forecast_error)
colnames(nmr_xxx)[ncol(nmr_xxx)]<- paste("for.error", as.character(year[i]), sep="")
}nmr_xxxerror<-nmr_xxx %>%
select(17:24)
rmsev<-mean(as.matrix(error))
rmsev= sqrt(rmsev)#cv
#RMSE
#error
rmse<-error %>%
summarise_all(sum)
rmse<-sqrt(rmse/1120)
rmse
#MAE
mae_y <- sqrt(error)
#mae_y
mae_y<- mae_y%>%
summarise_all(sum)
mae_y<- mae_y/1120
mae_y
cvycv_yearly<- rbind(mae_y, rmse)
methodv=c("BETA (MAE)","BETA (RMSE)")
names=seq(2011,2018,1)
names= as.character(names)
colnames(cv_yearly)<-names
cv_yearly<- cv_yearly %>%
mutate(method=methodv) %>%
select(method, everything())
cv_yearlycvy2<- rbind(cvy, cv_yearly)
write.csv(cvy2, "03-data/code04_timeseries_cv_yearly.csv")the RMSE is sqrt(6.1883245)
errormae<-mean(as.matrix(sqrt(error)))
mae[1] 1.6581373
cv<-rbind(cv, data.frame(method= c("Beta"), MAE= mae, RMSE=rmsev))
cvwrite.csv(cv, "03-data/code04_timeseries_cv.csv")nmr_beta nmr_beta %>%
#ggplot(aes(x=year_final, y=beta_coefficient))+
ggplot(aes(x=year_final, y=alpha)) +
geom_point()+
geom_smooth(method="lm")nmr_beta %>%
#ggplot(aes(x=year_final, y=beta_coefficient))+
ggplot(aes(x=year_final, y=lambda)) +
geom_point()+
geom_smooth(method="lm")
nmr_beta %>%
ggplot(aes(x=year_final, y=beta_coefficient))+
#ggplot(aes(x=year_final, y=lambda)) +
geom_point()+
geom_smooth(method="lm")for forecasting nmr a prediction of beta and apha is needed can the forecasting of these coefficients be improved?
alpha_ts<- ts(nmr_beta$alpha, start=2007)
beta_ts<- ts(nmr_beta$beta_coefficient, start=2007)
autoplot(alpha_ts)autoplot(beta_ts)
ggAcf(alpha_ts)ggAcf(beta_ts)
fita_arima <- auto.arima(alpha_ts)
fitb_arima <- auto.arima(beta_ts)
fita_ets <- ets(alpha_ts)
fitb_ets <- ets(beta_ts)as it can be seen below, beta and alpha may not be predicted using arima or ets models since the trend is not considered.
fita_arima %>% forecast(h=4) %>% autoplot()fitb_arima %>% forecast(h=4) %>% autoplot()fita_ets %>% forecast(h=4) %>% autoplot()fitb_ets %>% forecast(h=4) %>% autoplot()NA
NAsessionInfo()R version 4.0.4 (2021-02-15)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19043)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets
[6] methods base
other attached packages:
[1] forecast_8.12 REAT_2.1.1
[3] readxl_1.3.1 data.table_1.12.8
[5] broom_0.5.6 modelr_0.1.7
[7] forcats_0.5.0 stringr_1.4.0
[9] dplyr_1.0.2 purrr_0.3.4
[11] readr_1.3.1 tidyr_1.0.3
[13] tibble_3.0.1 ggplot2_3.3.0
[15] tidyverse_1.3.0 knitr_1.28
[17] ExPanDaR_0.5.3 devtools_2.3.0
[19] usethis_1.6.1
loaded via a namespace (and not attached):
[1] nlme_3.1-152 fs_1.4.1
[3] xts_0.12-0 lubridate_1.7.8
[5] httr_1.4.1 rprojroot_1.3-2
[7] tools_4.0.4 backports_1.1.6
[9] R6_2.4.1 DT_0.13
[11] mgcv_1.8-33 DBI_1.1.0
[13] colorspace_1.4-1 nnet_7.3-15
[15] withr_2.2.0 tidyselect_1.1.0
[17] prettyunits_1.1.1 tictoc_1.0
[19] processx_3.4.2 curl_4.3
[21] compiler_4.0.4 cli_2.0.2
[23] rvest_0.3.5 xml2_1.3.2
[25] desc_1.2.0 labeling_0.3
[27] tseries_0.10-47 scales_1.1.1
[29] lmtest_0.9-37 fracdiff_1.5-1
[31] quadprog_1.5-8 callr_3.4.3
[33] askpass_1.1 digest_0.6.25
[35] foreign_0.8-81 rio_0.5.16
[37] pkgconfig_2.0.3 htmltools_0.4.0
[39] sessioninfo_1.1.1 dbplyr_1.4.3
[41] fastmap_1.0.1 TTR_0.23-6
[43] htmlwidgets_1.5.1 rlang_0.4.7
[45] quantmod_0.4.17 rstudioapi_0.11
[47] shiny_1.4.0.2 farver_2.0.3
[49] generics_0.0.2 zoo_1.8-8
[51] jsonlite_1.7.1 zip_2.0.4
[53] magrittr_1.5 Matrix_1.3-2
[55] Rcpp_1.0.4.6 munsell_0.5.0
[57] fansi_0.4.1 shinycssloaders_0.3
[59] lifecycle_0.2.0 stringi_1.4.6
[61] pkgbuild_1.0.7 grid_4.0.4
[63] parallel_4.0.4 promises_1.1.0
[65] crayon_1.3.4 lattice_0.20-41
[67] splines_4.0.4 haven_2.2.0
[69] hms_0.5.3 ps_1.3.2
[71] pillar_1.4.4 pkgload_1.0.2
[73] urca_1.3-0 reprex_0.3.0
[75] glue_1.4.0 remotes_2.1.1
[77] vctrs_0.3.2 httpuv_1.5.2
[79] testthat_2.3.2 cellranger_1.1.0
[81] gtable_0.3.0 openssl_1.4.1
[83] assertthat_0.2.1 xfun_0.22
[85] openxlsx_4.1.5 mime_0.9
[87] xtable_1.8-4 later_1.0.0
[89] timeDate_3043.102 memoise_1.1.0
[91] ellipsis_0.3.0